#################################
# ANALYSIS: OVERALL INSTITUTIONAL PERSISTENCE
# 
# Part of replication of:
# Traditional Institutions in Africa, Past and Present
#
# Political Science Research and Methods
#
# By Clara Neupert-Wentz and Carl Müller-Crepon, 2023
#
##########################################



# PREPARE ANALYSIS ##############

## Define main data
data <- readRDS("data/main_data.rds")


## Main outcome
main.depvar <- c(`TPI Index` = "tpi_pc1")

all.depvar <- plot.vars <- c( "tpi_pc1",  "tni_level_ord", "leader_max",
                              "inst_mean", "leader_mean", 
                              "ties_mean","func_mean")
all.depvar.labs <- plot.labs <- c("TPI Index", 
                                  "TPI Level","Max Leader", 
                                  "Institution Index",  "Leader Index", 
                                  "State-ties Index", "Functions Index")
names(all.depvar) <- all.depvar.labs

## Sort version -- 3 indices
short.depvar  <- c( "tpi_pc1",  "tpi_centr_idx", "tpi_funct_idx")
short.depvar.labs <- c("TPI Index", "Political Centralization", "Functional Differentiation")
names(short.depvar) <- short.depvar.labs

## Main Treatment
main.treat <- c(`Jurisdictional Hierarchy (v33)` = "v33.num")


## Define main covariates

### Baseline
base.controls <- c("log(1 + pop.1880)", "log(poly.area)",
                   "log(coast.dist)", "log(river.dist)")
base.controls.lab <- c("Population","Area", "Distance to coast", "Distance to nav. river")
names(base.controls) <- base.controls.lab

### Ethnic vars from Murdock
ethnic.controls <- paste0("v",c(4,5,28),".num")
ethnic.controls.lab <- c("Reliance on agriculture",
                         "Reliance on pastoralism", "Intensity of agriculture")
names(ethnic.controls) <- ethnic.controls.lab

### Nature
nature.controls <- c("medianaltitude","medianslope","temperaturemean","evapotranspiration",  
                     "precipitation", "ppetratio","suit", "max.cash.crop",
                     "malaria.tsuit.max", "tsetse.max")
nature.controls.lab <- c("Altitude","Ruggedness","Temperature","Evapotranspiration",  
                          "Precipitation", "Precipitation/Evapotr.","Agr. suitability", "Cash crop suitability",
                          "Malaria environment", "Tsetse environment")
names(nature.controls) <- nature.controls.lab

### Combine into main three specifications
main.cov.ls <- list(c(),
                    c(base.controls),
                    c(base.controls,ethnic.controls),
                    c(base.controls,ethnic.controls,nature.controls))

## Cluster of SEs?
cluster.var <- "gid" ## Murdock group (across borders)


## Main fixed effects
main.fe <- "iso3c" ## Country


## Stargazer Output

### What to keep?
star.keep <- c(`Precol. centralization (v33)` = "v33.num")

### Output type
star.out <- "latex" ### At some point, this will become nicer

### Notes
if(star.out == "latex"){
  latex.notes.add <- function(width = .95, cluster = "ethnic group level"){
    paste0("\\parbox[t]{",width,"\\textwidth}{\\textit{Notes:} OLS models. Standard errors are clustered on the ethnic group level. 
           Significance codes: $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01.}")}
} else {
  latex.notes.add <- function(){""}
}


# SHORT OUTCOMES (Plot) ##########
# Figure 3, main text

# Setup 
stub <- "v33_contemp_shortdv"

# Estimate
model.ls <- unlist(lapply(short.depvar, function(dv){
  # Setup equations
  spec.ls <- lapply(main.cov.ls, function(cv){
    make_form(dv = dv, 
              expl = c(main.treat, cv),
              fe = main.fe, iv = "0", se = cluster.var)
  })
  # Estimate 
  lapply(spec.ls, function(spec){
    felm(spec, data = data)
  })
}), recursive = F)

# Get coefficients
coef.ls <- extract_coef(model.ls)

# Prepare plot
plot.df <- do.call(rbind, lapply(coef.ls, function(c){
  data.frame(outcome = c$dv,
             treat = main.treat, 
             coef = c$coef[main.treat, 1],
             se = diag(c$clustervcv[main.treat, main.treat, drop = F])^.5)
}))
plot.df$spec <- rep(1:length(main.cov.ls), length(short.depvar))
plot.df$outcome_label <- factor(rep(short.depvar.labs, each = length(main.cov.ls)), 
                                levels = short.depvar.labs, ordered = T)

# Plot prepare
g <- ggplot(plot.df, aes(y = coef, x = spec)) + 
  geom_point() + 
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se), 
                width=0, lwd = .5) +
  geom_errorbar(aes(ymin = coef - 1.645*se, 
                    ymax = coef + 1.645*se), 
                width=0, lwd = 1) +
  geom_hline(yintercept = 0, color = "darkgrey", lty = 2) +  
  scale_x_continuous(breaks=c(1:length(main.cov.ls)), expand = c(.124, .124))  + 
  xlab("Specification") + ylab("Marginal effect of\njurisdictional hierarchy (v33)") +
  facet_wrap(~ outcome_label, nrow = 1) +
  theme_minimal() +
  labs(caption = "Specifications: (1) No controls; (2) Baseline; (3) Baseline + ethnic; (4) Baseline + ethnic + nature controls") +
  theme(panel.spacing = unit(1, "lines")) 

# Plot save
png(file.path(fig.path, paste0(stub, ".png")),  width = 6.5, height = 3,  unit = "in", res = 400)
par(mar = c(0,0,0,0))
print(g)
dev.off()



# ALL OUTCOMES (Plot) ##########
# Figure 4, main text

# Setup 
stub <- "v33_contemp_alldv"

# Estimate
model.ls <- unlist(lapply(plot.vars, function(dv){
  # Setup equations
  spec.ls <- lapply(main.cov.ls, function(cv){
    make_form(dv = dv, 
              expl = c(main.treat, cv),
              fe = main.fe, iv = "0", se = cluster.var)
  })
  # Estimate 
  lapply(spec.ls, function(spec){
    felm(spec, data = data)
  })
}), recursive = F)

# Get coefficients
coef.ls <- extract_coef(model.ls)

# Prepare plot
plot.df <- do.call(rbind, lapply(coef.ls, function(c){
  data.frame(outcome = c$dv,
             treat = main.treat, 
             coef = c$coef[main.treat, 1],
             se = diag(c$clustervcv[main.treat, main.treat, drop = F])^.5)
}))
plot.df$spec <- rep(1:length(main.cov.ls), length(plot.vars))
plot.df$outcome_label <- factor(rep(plot.labs, each = length(main.cov.ls)), levels = plot.labs, ordered = T)

# Plot prepare
g <- ggplot(plot.df, aes(y = coef, x = spec)) + 
  geom_point() + 
  geom_errorbar(aes(ymin = coef - 1.96*se, 
                    ymax = coef + 1.96*se), 
                width=0, lwd = .5) +
  geom_errorbar(aes(ymin = coef - 1.645*se, 
                    ymax = coef + 1.645*se), 
                width=0, lwd = 1) +
  geom_hline(yintercept = 0, color = "darkgrey", lty = 2) +  
  scale_x_continuous(breaks=c(1:length(main.cov.ls)), expand = c(.125, .125))  + 
  xlab("Specification") + ylab("Marginal effect of\njurisdictional hierarchy (v33)") +
  facet_wrap(~ outcome_label, nrow = 1) +
  theme_minimal() +
  labs(caption = "Specifications: (1) No controls; (2) Baseline; (3) Baseline + ethnic; (4) Baseline + ethnic + nature controls") +
  theme(panel.spacing = unit(1, "lines")) 

# Plot save
png(file.path(fig.path, paste0(stub, ".png")),  width = 8, height = 3,  unit = "in", res = 400)
par(mar = c(0,0,0,0))
print(g)
dev.off()





# Relation of TPI vars with each other ###
# Appendix Figure A1

# Select data to plot
plot.df <- data


### Reshape
long.df <- do.call(rbind, lapply(1:length(all.depvar), function(i){
  do.call(rbind, lapply(1:length(all.depvar), function(j){
    data.frame(i.lab = all.depvar.labs[i], 
               j.lab = all.depvar.labs[j],
               value.i = plot.df[,all.depvar[i]],
               value.j = plot.df[,all.depvar[j]],
               
               stringsAsFactors = F)
  }))
}))
long.df$i.lab <- factor(long.df$i.lab, levels =  unique(long.df$i.lab), ordered = T)
long.df$j.lab <- factor(long.df$j.lab, levels =  unique(long.df$j.lab), ordered = T)

# Plot Scatterplot matrix
set.seed(1)
g <- ggplot(long.df, aes(x = value.i, y = value.j)) +
  geom_jitter(height = .00, width = .05, alpha = .1) + 
  geom_smooth(method = "lm") +
  facet_grid(j.lab ~ i.lab) +
  scale_x_continuous(breaks = scales::pretty_breaks(n = 3), limits = c(0,1), name = NULL) + 
  scale_y_continuous(breaks = scales::pretty_breaks(n = 3), limits = c(0,1), name = NULL) + 
  facet_grid(j.lab ~ i.lab) +
  ggtitle("Relation among indeces of contemporary TPIs in Murdock groups") +
  theme_minimal()

png(file.path(fig.path, paste0("tradgovgrps_scatter.png")), width = 8, height = 8, unit = "in", res = 400)
print(g)
dev.off()




# SUMMARY STATS #################
# Appendix Table A1

# Summary stats Table 

## Variables
sum.vars <- c(plot.vars[1],
              "tpi_centr_idx", "tpi_funct_idx",
              plot.vars[-1])

## Prepare
sum.df <- data[,unlist(lapply(c(sum.vars,main.treat,  main.cov.ls[[length(main.cov.ls)]]), function(x){
  all.vars(as.formula(paste0("0 ~", x)))
}))]

## Print
fileConn <- file(file.path(tab.path, "persistence.sumtab.tex"))
writeLines(
  stargazer(sum.df,
    covariate.labels = names(c(all.depvar[1],
                               "Centralization Index" = "", "Functions Index" = "",all.depvar[-1],
                               main.treat,  main.cov.ls[[length(main.cov.ls)]])),
    title="Pre-colonial centralization and current TPIs: Summary Stats",
    omit.stat = c("rsq","res.dev","ser"),column.sep.width = "-5pt",align =T,
    type  = star.out), 
  fileConn)
close(fileConn)


# PRINCIPAL COMPONENT ANALYSIS ###
# Appendix Table A3

# Select data to transfor
pca.df <- data

# Which variables?
pca.vars <- tradinst.pca.vars
pca.labs <- c( "TPI Level",
               "Institution Index",  "Leader Index", "Max Leader", 
               "State-ties Index", "Functions Index" )

# Subset to non-missing
pca.df <- na.omit(pca.df[,pca.vars])
pca.df <- -1*pca.df

# Compute principal component
pca.res <- prcomp(pca.df, scale. = T, center = T)

# Summary
stub <- "grps.pca.sum"
fileConn<-file(file.path(tab.path, paste0(stub, ".tex")))
writeLines(
  pca_gazer(pca.res, 
            var.labs = pca.labs, 
            title = "PCA of group-level traditional institutions indicators", 
            label = stub, col.sep = "-5pt"), 
  fileConn)
close(fileConn)



# BASELINE MODEL ####
# Appendix Table A5

# Setup 
stub <- "v33_contemp_main"

# Setup equations
spec.ls <- lapply(main.cov.ls, function(cv){
  make_form(dv = main.depvar, 
            expl = c(main.treat, cv),
            fe = main.fe, iv = "0", se = cluster.var)
})

# Estimate 
model.ls <- lapply(spec.ls, function(spec){
  felm(spec, data = data)
})

# Prepare table
add.lines <- list(latex.addline("Baseline covariates", c("no", "yes", "yes", "yes")),
                  latex.addline("Ethnic covariates", c("no", "no", "yes", "yes")),
                  latex.addline("Nature covariates", c("no", "no", "no", "yes")),
                  latex.addline("Country (2016) FEs", c("yes", "yes", "yes", "yes")))

# Print
fileConn<-file(file.path(tab.path, paste0(stub, ".tex")))
writeLines(
  stargazer(model.ls,
            title="Jurisdictional hierarchy and current TPI Index",
            dep.var.caption = names(main.depvar),dep.var.labels.include = FALSE,
            keep= star.keep, covariate.labels = names(star.keep),
            notes.align = "l",label = stub,align =T,
            add.lines = add.lines, 
            digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),column.sep.width = "8pt",
            notes = latex.notes.add(), notes.label = "", notes.append = F,
            type  = star.out), 
  fileConn)
close(fileConn)

# NO FIXED EFFECTS ####
# Appendix Table A8

# Setup 
stub <- "v33_contemp_nofe"

# Setup equations
spec.ls <- lapply(main.cov.ls, function(cv){
  make_form(dv = main.depvar, 
            expl = c(main.treat, cv),
            fe = "0", iv = "0", se = cluster.var)
})

# Estimate 
model.ls <- lapply(spec.ls, function(spec){
  felm(spec, data = data)
})

# Prepare table
add.lines <- list(latex.addline("Baseline covariates", c("no", "yes", "yes", "yes")),
                  latex.addline("Ethnic covariates", c("no", "no", "yes", "yes")),
                  latex.addline("Nature covariates", c("no", "no", "no", "yes")),
                  latex.addline("Country (2016) FEs", c("no", "no", "no", "no")))

# Print
fileConn<-file(file.path(tab.path, paste0(stub, ".tex")))
writeLines(
  stargazer(model.ls,
            title="Jurisdictional hierarchy and current TPI Index: No fixed effects",
            dep.var.caption = names(main.depvar),dep.var.labels.include = FALSE,
            keep= star.keep, covariate.labels = names(star.keep),
            notes.align = "l",label = stub,align =T,
            add.lines = add.lines, 
            digits = 3, intercept.top = T,intercept.bottom = F,
            omit.stat = c("rsq","res.dev","ser"),column.sep.width = "8pt",
            notes = latex.notes.add(), notes.label = "", notes.append = F,
            type  = star.out), 
  fileConn)
close(fileConn)



